/* GENERATED SOURCE. DO NOT MODIFY. */
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
**********************************************************************
*   Copyright (c) 2002-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*/

package ohos.global.icu.text;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;

import ohos.global.icu.impl.Assert;
import ohos.global.icu.impl.RBBIDataWrapper;
import ohos.global.icu.lang.UCharacter;
import ohos.global.icu.lang.UProperty;
import ohos.global.icu.text.RBBIRuleBuilder.IntPair;

/**
 *  This class is part of the RBBI rule compiler.
 *  It builds the state transition table used by the RBBI runtime
 *  from the expression syntax tree generated by the rule scanner.
 *
 *  This class is part of the RBBI implementation only.
 *  There is no user-visible public API here.
 */
class RBBITableBuilder {

    //
    //  RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors,
    //                        one for each state.
    static class RBBIStateDescriptor {
        boolean      fMarked;
        int          fAccepting;
        int          fLookAhead;
        SortedSet<Integer> fTagVals;
        int          fTagsIdx;
        Set<RBBINode> fPositions;                 // Set of parse tree positions associated
                                                  //   with this state.  Unordered (it's a set).
                                                  //   UVector contents are RBBINode *

        int[]        fDtran;                      // Transitions out of this state.
                                                   //   indexed by input character
                                                   //   contents is int index of dest state
                                                   //   in RBBITableBuilder.fDStates

        RBBIStateDescriptor(int maxInputSymbol) {
            fTagVals = new TreeSet<>();
            fPositions = new HashSet<>();
            fDtran = new int[maxInputSymbol+1];    // fDtran needs to be pre-sized.
                                                    //   It is indexed by input symbols, and will
                                                    //   hold  the next state number for each
                                                    //   symbol.
        }
    }


    private  RBBIRuleBuilder  fRB;

    /** The array index into RBBIRuleBuilder.fTreeRoots for the parse tree to operate on. */
    private  int  fRootIx;

    /** D states (Aho's terminology). Index is state number. */
    private  List<RBBIStateDescriptor> fDStates;

    /** Synthesized safe table, a List of row arrays.  */
    private List<short[]>    fSafeTable;

    /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */
    int[] fLookAheadRuleMap;

    //-----------------------------------------------------------------------------
    //
    //  Constructor    for RBBITableBuilder.
    //
    //                 rootNode is an index into the array of root nodes that is held by
    //                          the overall RBBIRuleBuilder.
    //-----------------------------------------------------------------------------
    RBBITableBuilder(RBBIRuleBuilder rb,  int rootNodeIx)  {
           fRootIx     = rootNodeIx;
           fRB         = rb;
           fDStates    = new ArrayList<>();
        }




       //-----------------------------------------------------------------------------
       //
       //   RBBITableBuilder::buildForwardTable  -  This is the main function for building
       //                          the DFA state transition table from the RBBI rules parse tree.
       //
       //-----------------------------------------------------------------------------
       void  buildForwardTable() {
           // If there were no rules, just return.  This situation can easily arise
           //   for the reverse rules.
           if (fRB.fTreeRoots[fRootIx]==null) {
               return;
           }

           //
           // Walk through the tree, replacing any references to $variables with a copy of the
           //   parse tree for the substition expression.
           //
           fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables();
           if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) {
               System.out.println("Parse tree after flattening variable references.");
               fRB.fTreeRoots[fRootIx].printTree(true);
           }

           //
           // If the rules contained any references to {bof}
           //   add a {bof} <cat> <former root of tree> to the
           //   tree.  Means that all matches must start out with the
           //   {bof} fake character.
           //
           if (fRB.fSetBuilder.sawBOF()) {
               RBBINode bofTop    = new RBBINode(RBBINode.opCat);
               RBBINode bofLeaf   = new RBBINode(RBBINode.leafChar);
               bofTop.fLeftChild  = bofLeaf;
               bofTop.fRightChild = fRB.fTreeRoots[fRootIx];
               bofLeaf.fParent    = bofTop;
               bofLeaf.fVal       = 2;      // Reserved value for {bof}.
               fRB.fTreeRoots[fRootIx] = bofTop;
           }

           //
           // Add a unique right-end marker to the expression.
           //   Appears as a cat-node, left child being the original tree,
           //   right child being the end marker.
           //
           RBBINode cn = new RBBINode(RBBINode.opCat);
           cn.fLeftChild = fRB.fTreeRoots[fRootIx];
           fRB.fTreeRoots[fRootIx].fParent = cn;
           RBBINode endMarkerNode = cn.fRightChild = new RBBINode(RBBINode.endMark);
           cn.fRightChild.fParent = cn;
           fRB.fTreeRoots[fRootIx] = cn;

           //
           //  Replace all references to UnicodeSets with the tree for the equivalent
           //      expression.
           //
           fRB.fTreeRoots[fRootIx].flattenSets();
           if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) {
               System.out.println("Parse tree after flattening Unicode Set references.");
               fRB.fTreeRoots[fRootIx].printTree(true);
           }


           //
           // calculate the functions nullable, firstpos, lastpos and followpos on
           // nodes in the parse tree.
           //    See the alogrithm description in Aho.
           //    Understanding how this works by looking at the code alone will be
           //       nearly impossible.
           //
           calcNullable(fRB.fTreeRoots[fRootIx]);
           calcFirstPos(fRB.fTreeRoots[fRootIx]);
           calcLastPos(fRB.fTreeRoots[fRootIx]);
           calcFollowPos(fRB.fTreeRoots[fRootIx]);
           if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) {
               System.out.print("\n");
               printPosSets(fRB.fTreeRoots[fRootIx]);
           }

           //
           //  For "chained" rules, modify the followPos sets
           //
           if (fRB.fChainRules) {
               calcChainedFollowPos(fRB.fTreeRoots[fRootIx], endMarkerNode);
           }

           //
           //  BOF (start of input) test fixup.
           //
           if (fRB.fSetBuilder.sawBOF()) {
               bofFixup();
           }

           //
           // Build the DFA state transition tables.
           //
           buildStateTable();
           mapLookAheadRules();
           flagAcceptingStates();
           flagLookAheadStates();
           flagTaggedStates();

           //
           // Update the global table of rule status {tag} values
           // The rule builder has a global vector of status values that are common
           //    for all tables.  Merge the ones from this table into the global set.
           //
           mergeRuleStatusVals();
       }



       //-----------------------------------------------------------------------------
       //
       //   calcNullable.    Impossible to explain succinctly.  See Aho, section 3.9
       //
       //-----------------------------------------------------------------------------
       void calcNullable(RBBINode n) {
           if (n == null) {
               return;
           }
           if (n.fType == RBBINode.setRef ||
               n.fType == RBBINode.endMark ) {
               // These are non-empty leaf node types.
               n.fNullable = false;
               return;
           }

           if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) {
               // Lookahead marker node.  It's a leaf, so no recursion on children.
               // It's nullable because it does not match any literal text from the input stream.
               n.fNullable = true;
               return;
           }


           // The node is not a leaf.
           //  Calculate nullable on its children.
           calcNullable(n.fLeftChild);
           calcNullable(n.fRightChild);

           // Apply functions from table 3.40 in Aho
           if (n.fType == RBBINode.opOr) {
               n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable;
           }
           else if (n.fType == RBBINode.opCat) {
               n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable;
           }
           else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) {
               n.fNullable = true;
           }
           else {
               n.fNullable = false;
           }
       }




       //-----------------------------------------------------------------------------
       //
       //   calcFirstPos.    Impossible to explain succinctly.  See Aho, section 3.9
       //
       //-----------------------------------------------------------------------------
       void calcFirstPos(RBBINode n) {
           if (n == null) {
               return;
           }
           if (n.fType == RBBINode.leafChar  ||
               n.fType == RBBINode.endMark   ||
               n.fType == RBBINode.lookAhead ||
               n.fType == RBBINode.tag) {
               // These are non-empty leaf node types.
               n.fFirstPosSet.add(n);
               return;
           }

           // The node is not a leaf.
           //  Calculate firstPos on its children.
           calcFirstPos(n.fLeftChild);
           calcFirstPos(n.fRightChild);

           // Apply functions from table 3.40 in Aho
           if (n.fType == RBBINode.opOr) {
               n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
               n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
           }
           else if (n.fType == RBBINode.opCat) {
               n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
               if (n.fLeftChild.fNullable) {
                   n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
               }
           }
           else if (n.fType == RBBINode.opStar ||
                    n.fType == RBBINode.opQuestion ||
                    n.fType == RBBINode.opPlus) {
               n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
           }
       }



       //-----------------------------------------------------------------------------
       //
       //   calcLastPos.    Impossible to explain succinctly.  See Aho, section 3.9
       //
       //-----------------------------------------------------------------------------
       void calcLastPos(RBBINode n) {
           if (n == null) {
               return;
           }
           if (n.fType == RBBINode.leafChar  ||
               n.fType == RBBINode.endMark   ||
               n.fType == RBBINode.lookAhead ||
               n.fType == RBBINode.tag) {
               // These are non-empty leaf node types.
               n.fLastPosSet.add(n);
               return;
           }

           // The node is not a leaf.
           //  Calculate lastPos on its children.
           calcLastPos(n.fLeftChild);
           calcLastPos(n.fRightChild);

           // Apply functions from table 3.40 in Aho
           if (n.fType == RBBINode.opOr) {
               n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
               n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
           }
           else if (n.fType == RBBINode.opCat) {
               n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
               if (n.fRightChild.fNullable) {
                   n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
               }
           }
           else if (n.fType == RBBINode.opStar     ||
                    n.fType == RBBINode.opQuestion ||
                    n.fType == RBBINode.opPlus) {
               n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
           }
       }



       //-----------------------------------------------------------------------------
       //
       //   calcFollowPos.    Impossible to explain succinctly.  See Aho, section 3.9
       //
       //-----------------------------------------------------------------------------
       void calcFollowPos(RBBINode n) {
           if (n == null ||
               n.fType == RBBINode.leafChar ||
               n.fType == RBBINode.endMark) {
               return;
           }

           calcFollowPos(n.fLeftChild);
           calcFollowPos(n.fRightChild);

           // Aho rule #1
           if (n.fType == RBBINode.opCat) {
               for (RBBINode i /* is 'i' in Aho's description */ : n.fLeftChild.fLastPosSet) {
                   i.fFollowPos.addAll(n.fRightChild.fFirstPosSet);
               }
           }

           // Aho rule #2
           if (n.fType == RBBINode.opStar ||
               n.fType == RBBINode.opPlus) {
               for (RBBINode i /* again, n and i are the names from Aho's description */ : n.fLastPosSet) {
                   i.fFollowPos.addAll(n.fFirstPosSet);
               }
           }
       }

       //-----------------------------------------------------------------------------
       //
       //           addRuleRootNodes    Recursively walk a parse tree, adding all nodes flagged
       //                               as roots of a rule to a destination vector.
       //
       //-----------------------------------------------------------------------------
       void addRuleRootNodes(List<RBBINode> dest, RBBINode node) {
           if (node == null) {
               return;
           }
           if (node.fRuleRoot) {
               dest.add(node);
               // Note: rules cannot nest. If we found a rule start node,
               //       no child node can also be a start node.
               return;
           }
           addRuleRootNodes(dest, node.fLeftChild);
           addRuleRootNodes(dest, node.fRightChild);
       }

       //-----------------------------------------------------------------------------
       //
       //   calcChainedFollowPos.    Modify the previously calculated followPos sets
       //                            to implement rule chaining.  NOT described by Aho
       //
       //-----------------------------------------------------------------------------
       void calcChainedFollowPos(RBBINode tree, RBBINode endMarkNode) {

           List<RBBINode> leafNodes      = new ArrayList<>();

           // get a list all leaf nodes
           tree.findNodes(leafNodes, RBBINode.leafChar);

           // Collect all leaf nodes that can start matches for rules
           // with inbound chaining enabled, which is the union of the
           // firstPosition sets from each of the rule root nodes.

           List<RBBINode> ruleRootNodes = new ArrayList<>();
           addRuleRootNodes(ruleRootNodes, tree);

           Set<RBBINode> matchStartNodes = new HashSet<>();
           for (RBBINode node: ruleRootNodes) {
               if (node.fChainIn) {
                   matchStartNodes.addAll(node.fFirstPosSet);
               }
           }

           // Iterate over all leaf nodes,
           //
           for (RBBINode endNode : leafNodes) {

               // Identify leaf nodes that correspond to overall rule match positions.
               //   These include the endMarkNode in their followPos sets.
               //
               // Note: do not consider other end marker nodes, those that are added to
               //       look-ahead rules. These can't chain; a match immediately stops
               //       further matching. This leaves exactly one end marker node, the one
               //       at the end of the complete tree.

               if (!endNode.fFollowPos.contains(endMarkNode)) {
                   continue;
               }

               // We've got a node that can end a match.

               // !!LBCMNoChain implementation:  If this node's val correspond to
               // the Line Break $CM char class, don't chain from it.
               // TODO:  Remove this. !!LBCMNoChain is deprecated, and is not used
               //             by any of the standard ICU rules.
               if (fRB.fLBCMNoChain) {
                   int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal);
                   if (c != -1) {
                       // c == -1 occurs with sets containing only the {eof} marker string.
                       int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
                       if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) {
                           continue;
                       }
                   }
               }


               // Now iterate over the nodes that can start a match, looking for ones
               //   with the same char class as our ending node.
               for (RBBINode startNode : matchStartNodes) {
                   if (startNode.fType != RBBINode.leafChar) {
                       continue;
                   }

                   if (endNode.fVal == startNode.fVal) {
                       // The end val (character class) of one possible match is the
                       //   same as the start of another.

                       // Add all nodes from the followPos of the start node to the
                       //  followPos set of the end node, which will have the effect of
                       //  letting matches transition from a match state at endNode
                       //  to the second char of a match starting with startNode.
                       endNode.fFollowPos.addAll(startNode.fFollowPos);
                   }
               }
           }
       }


       //-----------------------------------------------------------------------------
       //
       //   bofFixup.    Fixup for state tables that include {bof} beginning of input testing.
       //                Do an swizzle similar to chaining, modifying the followPos set of
       //                the bofNode to include the followPos nodes from other {bot} nodes
       //                scattered through the tree.
       //
       //                This function has much in common with calcChainedFollowPos().
       //
       //-----------------------------------------------------------------------------
       void bofFixup() {
           //
           //   The parse tree looks like this ...
           //         fTree root  --.       <cat>
           //                               /     \
           //                            <cat>   <#end node>
           //                           /     \
           //                     <bofNode>   rest
           //                               of tree
           //
           //    We will be adding things to the followPos set of the <bofNode>
           //
           RBBINode  bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild;
           Assert.assrt(bofNode.fType == RBBINode.leafChar);
           Assert.assrt(bofNode.fVal == 2);

           // Get all nodes that can be the start a match of the user-written rules
           //  (excluding the fake bofNode)
           //  We want the nodes that can start a match in the
           //     part labeled "rest of tree"
           //
           Set<RBBINode> matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet;
           for (RBBINode startNode : matchStartNodes) {
               if (startNode.fType != RBBINode.leafChar) {
                   continue;
               }

               if (startNode.fVal == bofNode.fVal) {
                   //  We found a leaf node corresponding to a {bof} that was
                   //    explicitly written into a rule.
                   //  Add everything from the followPos set of this node to the
                   //    followPos set of the fake bofNode at the start of the tree.
                   //
                   bofNode.fFollowPos.addAll(startNode.fFollowPos);
               }
           }
       }

       //-----------------------------------------------------------------------------
       //
       //   buildStateTable()    Determine the set of runtime DFA states and the
       //                        transition tables for these states, by the algorithm
       //                        of fig. 3.44 in Aho.
       //
       //                        Most of the comments are quotes of Aho's psuedo-code.
       //
       //-----------------------------------------------------------------------------
       void buildStateTable() {
           //
           // Add a dummy state 0 - the stop state.  Not from Aho.
           int      lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1;
           RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol);
           fDStates.add(failState);

           // initially, the only unmarked state in Dstates is firstpos(root),
           //       where toot is the root of the syntax tree for (r)#;
           RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol);
           initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet);
           fDStates.add(initialState);

           // while there is an unmarked state T in Dstates do begin
           for (;;) {
               RBBIStateDescriptor T = null;
               int              tx;
               for (tx=1; tx<fDStates.size(); tx++) {
                   RBBIStateDescriptor temp  = fDStates.get(tx);
                   if (temp.fMarked == false) {
                       T = temp;
                       break;
                   }
               }
               if (T == null) {
                   break;
               }

               // mark T;
               T.fMarked = true;

               // for each input symbol a do begin
               int  a;
               for (a = 1; a<=lastInputSymbol; a++) {
                   // let U be the set of positions that are in followpos(p)
                   //    for some position p in T
                   //    such that the symbol at position p is a;
                   Set<RBBINode> U = null;
                   for (RBBINode p : T.fPositions) {
                       if ((p.fType == RBBINode.leafChar) &&  (p.fVal == a)) {
                           if (U == null) {
                               U = new HashSet<>();
                           }
                           U.addAll(p.fFollowPos);
                       }
                   }

                   // if U is not empty and not in DStates then
                   int  ux = 0;
                   boolean    UinDstates = false;
                   if (U != null) {
                       Assert.assrt(U.size() > 0);
                       int  ix;
                       for (ix=0; ix<fDStates.size(); ix++) {
                           RBBIStateDescriptor temp2;
                           temp2 = fDStates.get(ix);
                           if (U.equals(temp2.fPositions)) {
                               U  = temp2.fPositions;
                               ux = ix;
                               UinDstates = true;
                               break;
                           }
                       }

                       // Add U as an unmarked state to Dstates
                       if (!UinDstates)
                       {
                           RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol);
                           newState.fPositions = U;
                           fDStates.add(newState);
                           ux = fDStates.size()-1;
                       }

                       // Dtran[T, a] := U;
                       T.fDtran[a] = ux;
                   }
               }
           }
       }

      /**
       * mapLookAheadRules
       *
       */
      void mapLookAheadRules() {
          fLookAheadRuleMap =  new int[fRB.fScanner.numRules() + 1];
          int laSlotsInUse = 0;

          for (RBBIStateDescriptor sd: fDStates) {
              int laSlotForState = 0;

              // Establish the look-ahead slot for this state, if the state covers
              // any look-ahead nodes - corresponding to the '/' in look-ahead rules.

              // If any of the look-ahead nodes already have a slot assigned, use it,
              // otherwise assign a new one.

              boolean sawLookAheadNode = false;
              for (RBBINode node: sd.fPositions) {
                  if (node.fType != RBBINode.lookAhead) {
                      continue;
                  }
                  sawLookAheadNode = true;
                  int ruleNum = node.fVal;     // Set when rule was originally parsed.
                  assert(ruleNum < fLookAheadRuleMap.length);
                  assert(ruleNum > 0);
                  int laSlot = fLookAheadRuleMap[ruleNum];
                  if (laSlot != 0) {
                      if (laSlotForState == 0) {
                          laSlotForState = laSlot;
                      } else {
                          // TODO: figure out if this can fail, change to setting an error code if so.
                          assert(laSlot == laSlotForState);
                      }
                  }
              }
              if (!sawLookAheadNode) {
                  continue;
              }

              if (laSlotForState == 0) {
                  laSlotForState = ++laSlotsInUse;
              }

              // For each look ahead node covered by this state,
              // set the mapping from the node's rule number to the look ahead slot.
              // There can be multiple nodes/rule numbers going to the same la slot.

              for (RBBINode node: sd.fPositions) {
                  if (node.fType != RBBINode.lookAhead) {
                      continue;
                  }
                  int ruleNum = node.fVal;     // Set when rule was originally parsed.
                  int existingVal = fLookAheadRuleMap[ruleNum];
                  assert(existingVal == 0 || existingVal == laSlotForState);
                  fLookAheadRuleMap[ruleNum] = laSlotForState;
              }
          }

      }

       //-----------------------------------------------------------------------------
       //
       //   flagAcceptingStates    Identify accepting states.
       //                          First get a list of all of the end marker nodes.
       //                          Then, for each state s,
       //                              if s contains one of the end marker nodes in its list of tree positions then
       //                                  s is an accepting state.
       //
       //-----------------------------------------------------------------------------
       void     flagAcceptingStates() {
           List<RBBINode> endMarkerNodes = new ArrayList<>();
           RBBINode    endMarker;
           int     i;
           int     n;

           fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark);

           for (i=0; i<endMarkerNodes.size(); i++) {
               endMarker = endMarkerNodes.get(i);
               for (n=0; n<fDStates.size(); n++) {
                   RBBIStateDescriptor sd = fDStates.get(n);
                   //if (sd.fPositions.indexOf(endMarker) >= 0) {
                   if (sd.fPositions.contains(endMarker)) {
                       // Any non-zero value for fAccepting means this is an accepting node.
                       // The value is what will be returned to the user as the break status.
                       // If no other value was specified, force it to -1.

                       if (sd.fAccepting==0) {
                           // State hasn't been marked as accepting yet.  Do it now.
                           sd.fAccepting = fLookAheadRuleMap[endMarker.fVal];
                           if (sd.fAccepting == 0) {
                               sd.fAccepting = -1;
                           }
                       }
                       if (sd.fAccepting==-1 && endMarker.fVal != 0) {
                           // Both lookahead and non-lookahead accepting for this state.
                           // Favor the look-ahead, because a look-ahead match needs to
                           // immediately stop the run-time engine. First match, not longest.
                           sd.fAccepting = fLookAheadRuleMap[endMarker.fVal];
                       }
                       // implicit else:
                       // if sd.fAccepting already had a value other than 0 or -1, leave it be.
                   }
               }
           }
       }


       //-----------------------------------------------------------------------------
       //
       //    flagLookAheadStates   Very similar to flagAcceptingStates, above.
       //
       //-----------------------------------------------------------------------------
       void     flagLookAheadStates() {
           List<RBBINode> lookAheadNodes = new ArrayList<>();
           RBBINode    lookAheadNode;
           int     i;
           int     n;

           fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead);
           for (i=0; i<lookAheadNodes.size(); i++) {
               lookAheadNode = lookAheadNodes.get(i);
               for (n=0; n<fDStates.size(); n++) {
                   RBBIStateDescriptor sd = fDStates.get(n);
                   if (sd.fPositions.contains(lookAheadNode)) {
                       int lookaheadSlot = fLookAheadRuleMap[lookAheadNode.fVal];
                       assert(sd.fLookAhead == 0 || sd.fLookAhead == lookaheadSlot);
                       sd.fLookAhead = lookaheadSlot;
                   }
               }
           }
       }




       //-----------------------------------------------------------------------------
       //
       //    flagTaggedStates
       //
       //-----------------------------------------------------------------------------
       void     flagTaggedStates() {
           List<RBBINode> tagNodes = new ArrayList<>();
           RBBINode    tagNode;
           int     i;
           int     n;

           fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag);
           for (i=0; i<tagNodes.size(); i++) {                   // For each tag node t (all of 'em)
               tagNode = tagNodes.get(i);

               for (n=0; n<fDStates.size(); n++) {              //    For each state  s (row in the state table)
                   RBBIStateDescriptor sd = fDStates.get(n);
                   if (sd.fPositions.contains(tagNode)) {       //       if  s include the tag node t
                       sd.fTagVals.add(Integer.valueOf(tagNode.fVal));
                   }
               }
           }
       }



       //-----------------------------------------------------------------------------
       //
       //  mergeRuleStatusVals
       //
       //      Allocate positions in the  global array of rule status {tag} values
       //
       //      The RBBI runtime uses an array of {sets of status values} that can
       //      be returned for boundaries.  Each accepting state that has non-zero
       //      status includes an index into this array.  The format of the array
       //      is
       //           Num of status values in group 1
       //              status val
       //              status val
       //              ...
       //           Num of status vals in group 2
       //              status val
       //              status val
       //              ...
       //           etc.
       //
       //
       //-----------------------------------------------------------------------------

       void  mergeRuleStatusVals() {
           //
           //  The basic outline of what happens here is this...
           //
           //    for each state in this state table
           //       if the status tag list for this state is in the global statuses list
           //           record where and
           //           continue with the next state
           //       else
           //           add the tag list for this state to the global list.
           //
           int n;

           // Pre-load a single tag of {0} into the table.
           //   We will need this as a default, for rule sets with no explicit tagging,
           //   or with explicit tagging of {0}.
           if (fRB.fRuleStatusVals.size() == 0) {
               fRB.fRuleStatusVals.add(Integer.valueOf(1));    // Num of statuses in group
               fRB.fRuleStatusVals.add(Integer.valueOf(0));    //   and our single status of zero

               SortedSet<Integer> s0 = new TreeSet<>();        // mapping for rules with no explicit tagging
               fRB.fStatusSets.put(s0, Integer.valueOf(0));    //   (key is an empty set).

               SortedSet<Integer> s1 = new TreeSet<>();        // mapping for rules with explicit tagging of {0}
               s1.add(Integer.valueOf(0));
               fRB.fStatusSets.put(s1, Integer.valueOf(0));
           }

           //    For each state, check whether the state's status tag values are
           //       already entered into the status values array, and add them if not.
           for (n=0; n<fDStates.size(); n++) {
               RBBIStateDescriptor sd = fDStates.get(n);
               Set<Integer> statusVals = sd.fTagVals;
               Integer arrayIndexI = fRB.fStatusSets.get(statusVals);
               if (arrayIndexI == null) {
                   // This is the first encounter of this set of status values.
                   //   Add them to the statusSets map, This map associates
                   //   the set of status values with an index in the runtime status
                   //   values array.
                   arrayIndexI = Integer.valueOf(fRB.fRuleStatusVals.size());
                   fRB.fStatusSets.put(statusVals, arrayIndexI);

                   // Add the new set of status values to the vector of values that
                   //   will eventually become the array used by the runtime engine.
                   fRB.fRuleStatusVals.add(Integer.valueOf(statusVals.size()));
                   fRB.fRuleStatusVals.addAll(statusVals);
               }

               // Save the runtime array index back into the state descriptor.
               sd.fTagsIdx = arrayIndexI.intValue();
           }
       }







       //-----------------------------------------------------------------------------
       //
       //  printPosSets   Debug function.  Dump Nullable, firstpos, lastpos and followpos
       //                 for each node in the tree.
       //
       //-----------------------------------------------------------------------------

       void printPosSets(RBBINode n) {
           if (n==null) {
               return;
           }
           RBBINode.printNode(n);
           System.out.print("         Nullable:  " + n.fNullable);

           System.out.print("         firstpos:  ");
           printSet(n.fFirstPosSet);

           System.out.print("         lastpos:   ");
           printSet(n.fLastPosSet);

           System.out.print("         followpos: ");
           printSet(n.fFollowPos);

           printPosSets(n.fLeftChild);
           printPosSets(n.fRightChild);
       }



       /**
        *  Find duplicate (redundant) character classes. Begin looking with categories.first.
        *  Duplicates, if found are returned in the categories parameter.
        *  This is an iterator-like function, used to identify character classes
        *  (state table columns) that can be eliminated.
        *  @param categories in/out parameter, specifies where to start looking for duplicates,
        *                and returns the first pair of duplicates found, if any.
        *  @return true if duplicate char classes were found, false otherwise.
        *  @hide draft / provisional / internal are hidden on OHOS
        */
       boolean findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories) {
           int numStates = fDStates.size();
           int numCols = fRB.fSetBuilder.getNumCharCategories();

           int table_base = 0;
           int table_dupl = 0;
           for (; categories.first < numCols-1; ++categories.first) {
               for (categories.second=categories.first+1; categories.second < numCols; ++categories.second) {
                   for (int state=0; state<numStates; state++) {
                       RBBIStateDescriptor sd = fDStates.get(state);
                       table_base = sd.fDtran[categories.first];
                       table_dupl = sd.fDtran[categories.second];
                       if (table_base != table_dupl) {
                           break;
                       }
                   }
                   if (table_base == table_dupl) {
                       return true;
                   }
               }
           }
           return false;
       }

       /**
        * Remove a column from the state table. Used when two character categories
        * have been found equivalent, and merged together, to eliminate the unneeded table column.
        */
       void removeColumn(int column) {
           int numStates = fDStates.size();
           for (int state=0; state<numStates; state++) {
               RBBIStateDescriptor sd = fDStates.get(state);
               assert(column < sd.fDtran.length);
               int[] newArray = Arrays.copyOf(sd.fDtran, sd.fDtran.length - 1);
               System.arraycopy(sd.fDtran, column+1, newArray, column, newArray.length - column);
               sd.fDtran = newArray;
           }
       }


       /**
        *  Find duplicate (redundant) states, beginning at the specified pair,
        *  within this state table. This is an iterator-like function, used to
        *  identify states (state table rows) that can be eliminated.
        *  @param states in/out parameter, specifies where to start looking for duplicates,
        *                and returns the first pair of duplicates found, if any.
        *  @return true if duplicate states were found, false otherwise.
        *  @hide draft / provisional / internal are hidden on OHOS
        */
       boolean findDuplicateState(RBBIRuleBuilder.IntPair states) {
           int numStates = fDStates.size();
           int numCols = fRB.fSetBuilder.getNumCharCategories();

           for (; states.first<numStates-1; ++states.first) {
               RBBIStateDescriptor firstSD = fDStates.get(states.first);
               for (states.second=states.first+1; states.second<numStates; ++states.second) {
                   RBBIStateDescriptor duplSD = fDStates.get(states.second);
                   if (firstSD.fAccepting != duplSD.fAccepting ||
                           firstSD.fLookAhead != duplSD.fLookAhead ||
                           firstSD.fTagsIdx   != duplSD.fTagsIdx) {
                       continue;
                   }
                   boolean rowsMatch = true;
                   for (int col=0; col < numCols; ++col) {
                       int firstVal = firstSD.fDtran[col];
                       int duplVal = duplSD.fDtran[col];
                       if (!((firstVal == duplVal) ||
                               ((firstVal == states.first || firstVal == states.second) &&
                                       (duplVal  == states.first || duplVal  == states.second)))) {
                           rowsMatch = false;
                           break;
                       }
                   }
                   if (rowsMatch) {
                       return true;
                   }
               }
           }
           return false;
       }

       /**
        *  Find the next duplicate state in the safe reverse table. An iterator function.
        *  @param states in/out parameter, specifies where to start looking for duplicates,
        *                and returns the first pair of duplicates found, if any.
        *  @return true if duplicate states were found, false otherwise.
        *  @hide draft / provisional / internal are hidden on OHOS
        */
       boolean findDuplicateSafeState(RBBIRuleBuilder.IntPair states) {
           int numStates = fSafeTable.size();

           for (; states.first<numStates-1; ++states.first) {
               short[] firstRow = fSafeTable.get(states.first);
               for (states.second=states.first+1; states.second<numStates; ++states.second) {
                   short[] duplRow = fSafeTable.get(states.second);
                   boolean rowsMatch = true;
                   int numCols = firstRow.length;
                   for (int col=0; col < numCols; ++col) {
                       int firstVal = firstRow[col];
                       int duplVal = duplRow[col];
                       if (!((firstVal == duplVal) ||
                               ((firstVal == states.first || firstVal == states.second) &&
                                       (duplVal  == states.first || duplVal  == states.second)))) {
                           rowsMatch = false;
                           break;
                       }
                   }
                   if (rowsMatch) {
                       return true;
                   }
               }
           }
           return false;
       }

       /**
        * Remove a duplicate state (row) from the state table. All references to the deleted (second) state
        * are redirected to first state.
        * @param duplStates The duplicate pair of states.
        * @hide draft / provisional / internal are hidden on OHOS
        */
       void removeState(IntPair duplStates) {
           final int keepState = duplStates.first;
           final int duplState = duplStates.second;
           assert(keepState < duplState);
           assert(duplState < fDStates.size());

           fDStates.remove(duplState);

           int numStates = fDStates.size();
           int numCols = fRB.fSetBuilder.getNumCharCategories();
           for (int state=0; state<numStates; ++state) {
               RBBIStateDescriptor sd = fDStates.get(state);
               for (int col=0; col<numCols; col++) {
                   int existingVal = sd.fDtran[col];
                   int newVal = existingVal;
                   if (existingVal == duplState) {
                       newVal = keepState;
                   } else if (existingVal > duplState) {
                       newVal = existingVal - 1;
                   }
                   sd.fDtran[col] = newVal;
               }
           }
       }

       /**
        * Remove a duplicate state from the safe table.
        * @param duplStates The duplicate pair of states.  The first is kept, the second is removed.
        *                   All references to the second in the state table are retargeted
        *                   to the first.
        * @hide draft / provisional / internal are hidden on OHOS
        */
       void removeSafeState(IntPair duplStates) {
           final int keepState = duplStates.first;
           final int duplState = duplStates.second;
           assert(keepState < duplState);
           assert(duplState < fSafeTable.size());

           fSafeTable.remove(duplState);
           int numStates = fSafeTable.size();
           for (int state=0; state<numStates; ++state) {
               short[] row = fSafeTable.get(state);
               for (int col=0; col<row.length; col++) {
                   int existingVal = row[col];
                   int newVal = existingVal;
                   if (existingVal == duplState) {
                       newVal = keepState;
                   } else if (existingVal > duplState) {
                       newVal = existingVal - 1;
                   }
                   row[col] = (short)newVal;
               }
           }
       }


       /**
        *  Check for, and remove duplicate states (table rows).
        *  @return the number of states removed.
        *  @hide draft / provisional / internal are hidden on OHOS
        */
       int removeDuplicateStates() {
           IntPair dupls = new IntPair(3, 0);
           int numStatesRemoved = 0;

           while (findDuplicateState(dupls)) {
               // System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
               removeState(dupls);
               ++numStatesRemoved;
           }
           return numStatesRemoved;
       }


       /**
        *  Calculate the size in bytes of the serialized form of this state transition table,
        *  which is identical to the ICU4C runtime form.
        *  Refer to common/rbbidata.h from ICU4C for the declarations of the structures
        *  being matched by this calculation.
        */
       int  getTableSize()  {
           if (fRB.fTreeRoots[fRootIx] == null) {
               return 0;
           }
           int size    = 16;    // The header of 4 ints, with no rows to the table.
           int numRows = fDStates.size();
           int numCols = fRB.fSetBuilder.getNumCharCategories();
           int rowSize = 8 + 2*numCols;
           size   += numRows * rowSize;
           size = (size + 7) & ~7;   // round up to a multiple of 8 bytes
           return size;
       }



       /**
        * Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table.
        * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C,
        * in common/rbbidata.h
        */
       RBBIDataWrapper.RBBIStateTable exportTable() {
           int                state;
           int                col;

           RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
           if (fRB.fTreeRoots[fRootIx] == null) {
               return table;
           }

           Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff &&
               fDStates.size() < 0x7fff);
           table.fNumStates = fDStates.size();

           // Size of table size in shorts.
           //  the "4" is the size of struct RBBIStateTableRow, the row header part only.
           int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories();   // Row Length in shorts.
           int tableSize = (getTableSize() - 16) / 2;       // fTable length in shorts.
           table.fTable = new short[tableSize];
           table.fRowLen = rowLen * 2;                      // Row length in bytes.

           if (fRB.fLookAheadHardBreak) {
               table.fFlags  |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
           }
           if (fRB.fSetBuilder.sawBOF()) {
               table.fFlags  |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
           }

           int numCharCategories = fRB.fSetBuilder.getNumCharCategories();
           for (state=0; state<table.fNumStates; state++) {
               RBBIStateDescriptor sd = fDStates.get(state);
               int row = state*rowLen;
               Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767);
               Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767);
               table.fTable[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting;
               table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead;
               table.fTable[row + RBBIDataWrapper.TAGIDX]    = (short)sd.fTagsIdx;
               for (col=0; col<numCharCategories; col++) {
                   table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col];
               }
           }
           return table;
       }

       /**
        *   Synthesize a safe state table from the main state table.
        */
       void buildSafeReverseTable() {
           // Safe Reverse table construction is described in more detail in the corresponding
           // function in ICU4C, in source/common/rbbitblb.cpp. Not duplicated here because
           // it is too likely to get out of sync.

           // Each safe pair is stored as two chars in the safePair stringBuilder.
           StringBuilder safePairs = new StringBuilder();

           int numCharClasses = fRB.fSetBuilder.getNumCharCategories();
           int numStates = fDStates.size();

           for (int c1=0; c1<numCharClasses; ++c1) {
               for (int c2=0; c2 < numCharClasses; ++c2) {
                   int wantedEndState = -1;
                   int endState = 0;
                   for (int startState = 1; startState < numStates; ++startState) {
                       RBBIStateDescriptor startStateD = fDStates.get(startState);
                       int s2 = startStateD.fDtran[c1];
                       RBBIStateDescriptor s2StateD = fDStates.get(s2);
                       endState = s2StateD.fDtran[c2];
                       if (wantedEndState < 0) {
                           wantedEndState = endState;
                       } else {
                           if (wantedEndState != endState) {
                               break;
                           }
                       }
                   }
                   if (wantedEndState == endState) {
                       safePairs.append((char)c1);
                       safePairs.append((char)c2);
                       // System.out.printf("(%d, %d) ", c1, c2);
                   }
               }
               // System.out.printf("\n");
           }

           // Populate the initial safe table.
           // The table as a whole is a List<short[]>
           // Row 0 is the stop state.
           // Row 1 is the start sate.
           // Row 2 and beyond are other states, initially one per char class, but
           //   after initial construction, many of the states will be combined, compacting the table.)
           // The String holds the nextState data only. The four leading fields of a row, fAccepting,
           // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.

           assert(fSafeTable == null);
           fSafeTable = new ArrayList<>();
           for (int row=0; row<numCharClasses + 2; ++row) {
               fSafeTable.add(new short[numCharClasses]);
           }

           // From the start state, each input char class transitions to the state for that input.
           short[] startState = fSafeTable.get(1);
           for (int charClass=0; charClass < numCharClasses; ++charClass) {
               // Note: +2 to skip the start & stop state rows.
               startState[charClass] = (short)(charClass+2);
           }

           // Initially make every other state table row look like the start state row
           //    (except for the stop state, which remains all 0)
           for (int row=2; row<numCharClasses+2; ++row) {
               System.arraycopy(startState, 0, fSafeTable.get(row), 0, startState.length);
           }

           // Run through the safe pairs, set the next state to zero when pair has been seen.
           // Zero being the stop state, meaning we found a safe point.
           for (int pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
               int c1 = safePairs.charAt(pairIdx);
               int c2 = safePairs.charAt(pairIdx + 1);

               short[] rowState = fSafeTable.get(c2 + 2);
               rowState[c1] = 0;
           }

           // Remove duplicate or redundant rows from the table.
           RBBIRuleBuilder.IntPair states = new RBBIRuleBuilder.IntPair(1, 0);
           while (findDuplicateSafeState(states)) {
               // System.out.printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
               removeSafeState(states);
           }
       }


       /**
        *  Calculate the size of the runtime form of this safe state table.
        */
       int getSafeTableSize() {
           if (fSafeTable == null) {
               return 0;
           }
           int size    = 16;    // The header of 4 ints, with no rows to the table.
           int numRows = fSafeTable.size();
           int numCols = fSafeTable.get(0).length;
           int rowSize = 8 + 2*numCols;
           size += numRows * rowSize;
           // TODO: there are redundant round-up. Figure out best place, get rid of the rest.
           size = (size + 7) & ~7;   // round up to a multiple of 8 bytes
           return size;
       }


       /**
        *  Create a RBBIDataWrapper.RBBIStateTable for the safe reverse table.
        *  RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C,
        *  in common/rbbidata.h
        */
       RBBIDataWrapper.RBBIStateTable exportSafeTable() {
           RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
           table.fNumStates = fSafeTable.size();
           int numCharCategories = fSafeTable.get(0).length;

           // Size of table size in shorts.
           //  the "4" is the size of struct RBBIStateTableRow, the row header part only.
           int rowLen = 4 + numCharCategories;
           // TODO: tableSize is basically numStates * numCharCategories,
           //       except for alignment padding. Clean up here, and in main exportTable().
           int tableSize = (getSafeTableSize() - 16) / 2;   // fTable length in shorts.
           table.fTable = new short[tableSize];
           table.fRowLen = rowLen * 2;                      // Row length in bytes.

           for (int state=0; state<table.fNumStates; state++) {
               short[] rowArray = fSafeTable.get(state);
               int row = state * rowLen;

               for (int col=0; col<numCharCategories; col++) {
                   table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = rowArray[col];
               }
           }
           return table;
       }


       //-----------------------------------------------------------------------------
       //
       //   printSet    Debug function.   Print the contents of a set of Nodes
       //
       //-----------------------------------------------------------------------------

       void printSet(Collection<RBBINode> s) {
           for (RBBINode n : s) {
               RBBINode.printInt(n.fSerialNum, 8);
           }
           System.out.println();
       }



       //-----------------------------------------------------------------------------
       //
       //   printStates    Debug Function.  Dump the fully constructed state transition table.
       //
       //-----------------------------------------------------------------------------

       void printStates() {
           int     c;    // input "character"
           int     n;    // state number

           System.out.print("state |           i n p u t     s y m b o l s \n");
           System.out.print("      | Acc  LA    Tag");
           for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
               RBBINode.printInt(c, 3);
           }
           System.out.print("\n");
           System.out.print("      |---------------");
           for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
               System.out.print("---");
           }
           System.out.print("\n");

           for (n=0; n<fDStates.size(); n++) {
               RBBIStateDescriptor sd = fDStates.get(n);
               RBBINode.printInt(n, 5);
               System.out.print(" | ");

               RBBINode.printInt(sd.fAccepting, 3);
               RBBINode.printInt(sd.fLookAhead, 4);
               RBBINode.printInt(sd.fTagsIdx, 6);
               System.out.print(" ");
               for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
                   RBBINode.printInt(sd.fDtran[c], 3);
               }
               System.out.print("\n");
           }
           System.out.print("\n\n");
       }


       /**
        * Debug Function.  Dump the fully constructed safe reverse table.
        */
       void printReverseTable() {
           int     c;    // input "character"

           System.out.printf("    Safe Reverse Table \n");
           if (fSafeTable == null) {
               System.out.printf("   --- nullptr ---\n");
               return;
           }
           int numCharCategories = fSafeTable.get(0).length;
           System.out.printf("state |           i n p u t     s y m b o l s \n");
           System.out.printf("      | Acc  LA    Tag");
           for (c=0; c< numCharCategories;  c++) {
               System.out.printf(" %2d", c);
           }
           System.out.printf("\n");
           System.out.printf("      |---------------");
           for (c=0; c<numCharCategories; c++) {
               System.out.printf("---");
           }
           System.out.printf("\n");

           for (int n=0; n<fSafeTable.size(); n++) {
               short rowArray[]  = fSafeTable.get(n);
               System.out.printf("  %3d | " , n);
               System.out.printf("%3d %3d %5d ", 0, 0, 0);  // Accepting, LookAhead, Tags
               for (c=0; c<numCharCategories; c++) {
                   System.out.printf(" %2d", rowArray[c]);
               }
               System.out.printf("\n");
           }
           System.out.printf("\n\n");
       }





       //-----------------------------------------------------------------------------
       //
       //   printRuleStatusTable    Debug Function.  Dump the common rule status table
       //
       //-----------------------------------------------------------------------------

       void printRuleStatusTable() {
           int  thisRecord = 0;
           int  nextRecord = 0;
           int      i;
           List<Integer> tbl = fRB.fRuleStatusVals;

           System.out.print("index |  tags \n");
           System.out.print("-------------------\n");

           while (nextRecord < tbl.size()) {
               thisRecord = nextRecord;
               nextRecord = thisRecord + tbl.get(thisRecord).intValue() + 1;
               RBBINode.printInt(thisRecord, 7);
               for (i=thisRecord+1; i<nextRecord; i++) {
                   int val = tbl.get(i).intValue();
                   RBBINode.printInt(val, 7);
               }
               System.out.print("\n");
           }
           System.out.print("\n\n");
       }



}
